X64: Use Struct for ELF Ehdr and Phdr headers #1694

ubaidsk · 2023-04-10T18:52:12Z

Also, towards #1485.

PS: This is a work in progress, hence the tests might not work.

ubaidsk · 2023-04-10T18:54:35Z

In this PR, I am attempting to decouple the assembly instructions and elf headers. @certik please, could you review and share if we are heading in desired direction?

ubaidsk · 2023-04-10T18:59:05Z

Also, using the approach in this PR, how shall we support generating assembly text format for the Elf headers?

certik

I think this looks good. Don't forget to remove the tmp binary.

certik · 2023-04-11T00:59:31Z

src/libasr/codegen/x86_assembler.cpp

+Elf64_Phdr get_program_header(X86Assembler &a) {
+    Elf64_Phdr p;
+    p.type = 1;
+    p.flags = 4;
+    p.offset = sizeof(Elf64_Phdr);
+    p.vaddr = a.origin();
+    p.paddr = a.origin();
+    p.filesz = sizeof(Elf64_Phdr);
+    p.memsz = sizeof(Elf64_Phdr);
+    p.align = 0x1000;
+    return p;
+}
+
+Elf64_Phdr get_text_segment(X86Assembler &a, Elf64_Phdr &p_program) {
+    Elf64_Phdr p;
+    p.type = 1;
+    p.flags = 5;
+    p.offset = p_program.offset + sizeof(p);
+    p.vaddr = a.origin() + p.offset;
+    p.paddr = a.origin() + p.offset;
+    p.filesz = sizeof(Elf64_Phdr);
+    p.memsz = sizeof(Elf64_Phdr);
+    p.align = 0x1000;
+    return p;
+}
+
+Elf64_Phdr get_data_segment(X86Assembler &a, Elf64_Phdr &p_text_seg) {
+    Elf64_Phdr p;
+    p.type = 1;
+    p.flags = 6;
+    p.offset = p_text_seg.offset + sizeof(p_text_seg);
+    p.vaddr = a.origin() + p.offset;
+    p.paddr = a.origin() + p.offset;
+    p.filesz = sizeof(Elf64_Phdr);
+    p.memsz = sizeof(Elf64_Phdr);
+    p.align = 0x1000;
+    return p;
+}


These functions only use a.origin(), so I would just pass origin as an integer parameter, not the whole x86Assembler.

certik · 2023-04-11T00:59:50Z

src/libasr/codegen/x86_assembler.cpp

+    e.type = 2;
+    e.machine = 0x3e;
+    e.version = 1;
+    e.entry = a.get_defined_symbol("_start").value;


I would pass a.get_defined_symbol("_start").value as an argument, not a.

certik · 2023-04-11T01:04:00Z

src/libasr/codegen/x86_assembler.cpp

+    out.write((const char*) m_code.p, m_code.size());
+
+    out.close();
+}


I would keep this function outside of X86Assembler. Eventually once we add Arm64Assembler, we will figure out how to generalize this function to also work for ELF ARM binaries.

Also I would rename it to something like create_elf64_x86_binary and I would pass m_code as an argument.

I would make the function return a string.

The caller will then save the string to a file.

I would make the function return a string.

I currently used a Vec<uint8_t>. I would like to know if it is fine or if I shall update it.

ubaidsk · 2023-04-13T18:12:44Z

Example:

(lp) lpython$ cat integration_tests/test_complex_01.py 
from lpython import i32, i64, f32, f64, c32, c64

def test_real_imag():
    x: c64
    x = c64(2) + 3j
    a: f64
    b: f64
    eps: f64
    eps = 1e-12
    a = x.real
    b = x.imag
    assert abs(a - 2.0) <= eps
    assert abs(b - 3.0) <= eps

    print(x)

def test_complex():
    x: c64
    x = complex(4.5, 6.7)
    eps: f64
    eps = 1e-12
    assert abs(x.real - 4.5) <= eps
    assert abs(x.imag - 6.7) <= eps

    x = complex(-4, 2)
    assert abs(x.real - (-4.0)) <= eps
    assert abs(x.imag - 2.0) <= eps

    x = complex(4, 7.89)
    assert abs(x.real - 4.0) <= eps
    assert abs(x.imag - 7.89) <= eps

    x = complex(5.6, 0)
    assert abs(x.real - 5.6) <= eps
    assert abs(x.imag - 0.0) <= eps

    a: f64
    a = 534.6
    x = complex(a, -a) # (f64, f64)

    assert abs(x.real - 534.60000000000002274) <= eps
    assert abs(x.imag - (-534.60000000000002274)) <= eps

    a2: f32
    a2 = -f32(423.5430806348152437)
    a3: f32
    a3 = f32(34.5)
    x2: c32
    x2 = c32(complex(a2, a3)) # (f32, f32)

    assert f64(abs(x2.imag - f32(34.5))) <= eps

    i1: i32
    i1 = -5
    i2: i64
    i2 = -i64(6)

    x = complex(a3, a) # (f32, f64)
    x = complex(a, a3) # (f64, f32)
    x = complex(i1, i2) # (i32, i64)
    x = complex(i1, -i1) # (i32, i32)
    x = complex(-i2, -i2) # (i64, i64)
    x = complex(i2, -i1) # (i64, i32)

    print(x)

def test_complex_unary_minus():
    c: c32
    c = c32(complex(3, 4.5))
    _c: c32
    _c = -c
    assert abs(f64(_c.real) - (-3.0)) <= 1e-12
    assert abs(f64(_c.imag) - (-4.5)) <= 1e-12
    _c = c32(complex(5, -78))
    _c = -_c
    assert abs(f64(_c.real) - (-5.0)) <= 1e-12
    assert abs(f64(_c.imag) - 78.0) <= 1e-12
    c2: c64
    c2 = complex(-4.5, -7.8)
    c2 = -c2
    assert abs(c2.real - 4.5) <= 1e-12
    assert abs(c2.imag - 7.8) <= 1e-12
    c2 = c64(3) + 4j
    c2 = -c2
    assert abs(c2.real - (-3.0)) <= 1e-12
    assert abs(c2.imag - (-4.0)) <= 1e-12

    print(c, _c, c2)

def test_complex_not():
    c: c32
    c = c32(complex(4, 5))
    b: bool
    b = not c
    assert not b

    c2: c64
    c2 = complex(0, 0)
    b = not c2
    assert b

    print(c,c2, b)

def check():
    test_real_imag()
    test_complex()
    test_complex_unary_minus()
    test_complex_not()

check()

(lp) lpython$ lpython integration_tests/test_complex_01.py --backend wasm -o tmp.out
(lp) lpython$ wasmtime tmp.out
(2.000000000,3.000000000)
(-6.000000000,5.000000000)
(3.000000000,4.50000000) (-5.000000000,78.000000000) (-3.000000000,-4.000000000)
(4.000000000,5.000000000) (0.000000000,0.000000000) 1

(lp) lpython$ lpython integration_tests/test_complex_01.py --backend wasm_x64 -o tmp2.out > tmp2.asm
(lp) lpython$ ./tmp2.out 
(2.000000000,3.000000000)
(-6.000000000,5.000000000)
(3.000000000,4.50000000) (-5.000000000,78.000000000) (-3.000000000,-4.000000000)
(4.000000000,5.000000000) (0.000000000,0.000000000) 1

(wasm_asm) lpython$ nasm -fbin tmp2.asm && chmod +x tmp2
(wasm_asm) lpython$ ./tmp2
(2.000000000,3.000000000)
(-6.000000000,5.000000000)
(3.000000000,4.50000000) (-5.000000000,78.000000000) (-3.000000000,-4.000000000)
(4.000000000,5.000000000) (0.000000000,0.000000000) 1

ubaidsk · 2023-04-13T18:17:41Z

This is ready. Please review and share feedback.

certik · 2023-04-14T04:03:12Z

src/libasr/codegen/x86_assembler.cpp

+
+    for (auto b:a.get_machine_code()) {
+        bin.push_back(al, b);
+    }


Is this copying the (potentially big) binary code byte by byte?

Yes, it is copying the binary code byte by byte. Another approach that we can do is to just return the ELF and Phdr headers in create_elf64_x86_binary() (we can rename it as create_elf64_x86_header()).

For example:

void X86Assembler::save_binary64(const std::string &filename) { Vec<uint8_t> header = create_elf64_x86_header(m_al, *this); { std::ofstream out; out.open(filename); out.write((const char*) header.p, header.size()); out.write((const char*) m_code.p, m_code.size()); out.close(); } #ifdef LFORTRAN_LINUX std::string mode = "0755"; int mod = strtol(mode.c_str(), 0, 8); if (chmod(filename.c_str(),mod) < 0) { throw AssemblerError("chmod failed"); } #endif }

I currently updated it to return the header only.

certik · 2023-04-14T04:03:56Z

I think it looks ok, but I am a bit worried about performance. Can you benchmark this compared to master?

ubaidsk · 2023-04-14T06:12:13Z

Please find the benchmark with respect to main. Benchmark used #1222 (comment) (N = 10000).

generate benchmark:

python examples/expr2.py > bench.py

main branch:

(lp) lpython$ time lpython --backend=wasm_x64 bench.py -o bench_wasm_x64.x

real    0m12.138s
user    0m12.079s
sys     0m0.060s

This PR:

(lp) lpython$ time lpython --backend=wasm_x64 bench.py -o bench_wasm_x64.x

real    0m12.963s
user    0m12.771s
sys     0m0.084s

Relative: (real time considered here)

main: 1.0
this pr: 1.0678

ubaidsk · 2023-04-14T06:52:50Z

The timing seems similar now (The timing on main as well as this branch seems to fluctuate considerably. The following runs are best of 3 to 4 executions.)

Main:

(lp) lpython$ time lpython --backend=wasm_x64 bench.py -o bench_wasm_x64.x

real    0m11.905s
user    0m11.842s
sys     0m0.061s

This PR:

(lp) lpython$ time lpython --backend=wasm_x64 bench.py -o bench_wasm_x64.x

real    0m11.877s
user    0m11.802s
sys     0m0.065s

ubaidsk · 2023-04-14T06:53:03Z

This is ready.

certik · 2023-04-14T13:49:03Z

Did you benchmark LPython compiled in Release mode?

ubaidsk · 2023-04-14T14:37:00Z

Did you benchmark LPython compiled in Release mode?

Sorry, it is in debug mode. Do we need release mode?

From #1222 (comment), it seems in release mode, the timing difference could be lesser. Thus, I thought that in debug mode, we could have enlarged timing difference which could act as a better or more stricter value.

certik · 2023-04-14T15:18:42Z

We should only do benchmarks in Release mode, as in Debug mode there are all kinds of asserts and checks that are not executed in Release mode, so they are not relevant.

src/libasr/codegen/x86_assembler.cpp

certik

I think this looks good now. Thanks for this refactoring!

ubaidsk · 2023-04-14T15:28:05Z

I think this looks good now. Thanks for this refactoring!

Shall we merge before release benchmarking or after release benchmarking?

Co-authored-by: Ondřej Čertík <[email protected]>

ubaidsk · 2023-04-14T16:16:48Z

On this PR Branch, using the following build script:

#!/usr/bin/env bash

set -e
set -x

cmake \
    -DCMAKE_BUILD_TYPE=Release \
    -DWITH_LLVM=yes \
    -DLPYTHON_BUILD_ALL=yes \
    -DWITH_STACKTRACE=no \
    -DWITH_RUNTIME_STACKTRACE=no \
    -DWITH_LSP=no \
    -DWITH_LFORTRAN_BINARY_MODFILES=no \
    -DCMAKE_PREFIX_PATH="$CMAKE_PREFIX_PATH_LPYTHON;$CONDA_PREFIX" \
    -DCMAKE_INSTALL_PREFIX=`pwd`/inst \
    .
cmake --build . -j16 --target install

I am receiving the following timing values on AMD Ryzen 5 2500U with Radeon Vega Mobile Gfx @1.600 GHz, Ubuntu 22.04.4 LTS:

(lp) lpython$ time lpython bench.py --backend wasm_x86 -o tmp

real    0m3.393s
user    0m3.340s
sys     0m0.053s
(lp) lpython$ time lpython bench.py --backend x86 -o tmp

real    0m2.736s
user    0m2.683s
sys     0m0.054s
(lp) lpython$ time lpython bench.py --backend wasm_x64 -o tmp

real    0m3.457s
user    0m3.417s
sys     0m0.040s
(lp) lpython$ time lpython bench.py --backend llvm -o tmp

real    0m30.106s
user    0m29.968s
sys     0m0.129s
(lp) ubaid@ubaid-Lenovo-ideapad-330-15ARR:~/Desktop/Open-Source/lpython$ time python bench.py 
249975005

real    0m0.499s
user    0m0.374s
sys     0m0.125s
(lp) ubaid@ubaid-Lenovo-ideapad-330-15ARR:~/Desktop/Open-Source/lpython$

These values seems to differ significantly from release mode values shared at #1222 (comment).

Is the build script used to build in release mode as expected or am I missing something?

certik · 2023-04-14T16:21:14Z

Measure master and this PR in Release mode and let's see. I can do the same. If the benchmarks in Release mode look good, we can merge.

ubaidsk · 2023-04-14T16:30:50Z

Compiled on ubuntu 22.04, using

./build0.sh
cmake .
cmake --build . -j16

On main branch:

(lp) lpython$ time lpython bench.py --backend wasm_x64 -o tmp.out

real    0m2.258s
user    0m2.218s
sys     0m0.041s

On this PR branch:

(lp) lpython$ time lpython bench.py --backend wasm_x64 -o tmp.out

real    0m1.588s
user    0m1.508s
sys     0m0.080s

Currently benchmarked the wasm_x64 backend as this PR brings changes in the wasm_x64 backend. We can also benchmark other backends.

certik · 2023-04-14T16:52:03Z

Ok. I think it's good enough. Go ahead and merge this.

ubaidsk · 2023-04-14T16:52:39Z

Thank you for the approval.

ubaidsk · 2023-04-14T17:05:56Z

I just noticed I also had another branch locally with slightly varied approach

Vec<uint8_t> create_elf64_x86_binary(Allocator &al, X86Assembler &a) {
    const int E_IDX = 0;
    const int PROGRAM_IDX = E_IDX + sizeof(Elf64_Ehdr);
    const int TEXT_SEG_IDX = PROGRAM_IDX + sizeof(Elf64_Phdr);
    const int DATA_SEG_IDX = TEXT_SEG_IDX + sizeof(Elf64_Phdr);
    const int TOTAL_HEADER_SIZE = DATA_SEG_IDX + sizeof(Elf64_Phdr);

    Vec<uint8_t> binary;
    binary.resize(al, TOTAL_HEADER_SIZE);

    Elf64_Ehdr* e = (Elf64_Ehdr*)(binary.p + E_IDX);
    Elf64_Phdr* p_program = (Elf64_Phdr*)(binary.p + PROGRAM_IDX);
    Elf64_Phdr* p_text_seg = (Elf64_Phdr*)(binary.p + TEXT_SEG_IDX);
    Elf64_Phdr* p_data_seg = (Elf64_Phdr*)(binary.p + DATA_SEG_IDX);

    set_header(a, e);
    set_program_header(a, p_program);
    set_text_segment(a, p_program, p_text_seg);
    set_data_segment(a, p_text_seg, p_data_seg);

    align_by_byte(al, binary, 0x1000);

    const int PROGRAM_HEADER_SIZE = binary.size();

    e->entry = a.get_defined_symbol("_start").value + PROGRAM_HEADER_SIZE;

    p_program->filesz = PROGRAM_HEADER_SIZE;
    p_program->memsz = p_program->filesz;

    p_text_seg->offset = p_program->offset + p_program->filesz;
    p_text_seg->vaddr = a.origin() + p_text_seg->offset + PROGRAM_HEADER_SIZE;
    p_text_seg->paddr = p_text_seg->vaddr;

    p_data_seg->offset = p_text_seg->offset + p_text_seg->filesz;
    p_data_seg->vaddr = a.origin() + p_data_seg->offset + PROGRAM_HEADER_SIZE;
    p_data_seg->paddr = p_data_seg->vaddr;

    append_bytes(al, a.get_machine_code(), binary);
    return binary;
}

where set_program_header() (and similary other functions) is implemented as:

void set_program_header(X86Assembler &a, Elf64_Phdr* p) {
    p->type = 1;
    p->flags = 4;
    p->offset = 0;
    p->vaddr = a.origin();
    p->paddr = a.origin();
    p->filesz = sizeof(Elf64_Ehdr) + 3 * sizeof(Elf64_Phdr);
    p->memsz = p->filesz;
    p->align = 0x1000;
}

ubaidsk · 2023-04-14T17:06:59Z

Sorry, I should have shared this approach #1694 (comment) early. I forgot about it while working on the new branch.

certik · 2023-04-14T18:49:03Z

The merged PR is fine with me. If you want to change it, go ahead and submit a PR.

certik reviewed Apr 11, 2023

View reviewed changes

ubaidsk force-pushed the elf_structs branch 2 times, most recently from 43021ba to 88aa79b Compare April 13, 2023 17:57

ubaidsk force-pushed the elf_structs branch from 88aa79b to e455564 Compare April 13, 2023 18:13

ubaidsk marked this pull request as ready for review April 13, 2023 18:14

ubaidsk requested a review from certik April 13, 2023 18:15

ubaidsk added ready for review PRs that are ready for review review_needed The PR needs a review to ensure the features implemented are as expected/desired labels Apr 13, 2023

certik reviewed Apr 14, 2023

View reviewed changes

ubaidsk added 10 commits April 14, 2023 12:05

X64: Define elf ehdr, phdr structs for x86 and x64

b766d8a

X64: Define header constructing functions

dab6068

WASM: X64: Emit alignment bytes before segment end

bd19300

WASM_X64: Define and use common get_seg_header()

a05b71c

WASM_X64: Support elf headers, footer in asm text

150a9c8

X86Assembler: Isolate get_asm() and save_bin() for 32 and 64bit

9ffc308

WASM: Fix neg val assign to global_var_idx

06ed809

WASM_X64: Remove unused functions

f12e42c

WASM_X64: Create and return only header binary

14b1197

WASM_X64: Pass only required params to create_elf64_x86_header()

0630d23

ubaidsk force-pushed the elf_structs branch from 89ea081 to 0630d23 Compare April 14, 2023 06:50

ubaidsk requested a review from certik April 14, 2023 06:53

certik reviewed Apr 14, 2023

View reviewed changes

src/libasr/codegen/x86_assembler.cpp Outdated Show resolved Hide resolved

certik approved these changes Apr 14, 2023

View reviewed changes

WASM_X64: Assign mod value directly as constant

1a99f9a

Co-authored-by: Ondřej Čertík <[email protected]>

ubaidsk merged commit 57ebbc8 into lcompilers:main Apr 14, 2023

ubaidsk deleted the elf_structs branch April 14, 2023 16:52

X64: Use Struct for ELF Ehdr and Phdr headers #1694

X64: Use Struct for ELF Ehdr and Phdr headers #1694

Uh oh!

Conversation

ubaidsk commented Apr 10, 2023 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

ubaidsk commented Apr 10, 2023

Uh oh!

ubaidsk commented Apr 10, 2023

Uh oh!

certik left a comment

Choose a reason for hiding this comment

Uh oh!

certik Apr 11, 2023

Choose a reason for hiding this comment

Uh oh!

certik Apr 11, 2023

Choose a reason for hiding this comment

Uh oh!

certik Apr 11, 2023

Choose a reason for hiding this comment

Uh oh!

ubaidsk Apr 13, 2023

Choose a reason for hiding this comment

Uh oh!

ubaidsk commented Apr 13, 2023 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

ubaidsk commented Apr 13, 2023

Uh oh!

certik Apr 14, 2023

Choose a reason for hiding this comment

Uh oh!

ubaidsk Apr 14, 2023

Choose a reason for hiding this comment

Uh oh!

ubaidsk Apr 14, 2023

Choose a reason for hiding this comment

Uh oh!

certik commented Apr 14, 2023

Uh oh!

ubaidsk commented Apr 14, 2023

Uh oh!

ubaidsk commented Apr 14, 2023 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

ubaidsk commented Apr 14, 2023

Uh oh!

certik commented Apr 14, 2023

Uh oh!

ubaidsk commented Apr 14, 2023 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

certik commented Apr 14, 2023

Uh oh!

Uh oh!

certik left a comment

Choose a reason for hiding this comment

Uh oh!

ubaidsk commented Apr 14, 2023

Uh oh!

ubaidsk commented Apr 14, 2023

Uh oh!

certik commented Apr 14, 2023

Uh oh!

ubaidsk commented Apr 14, 2023 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

certik commented Apr 14, 2023

Uh oh!

ubaidsk commented Apr 14, 2023

Uh oh!

ubaidsk commented Apr 14, 2023

Uh oh!

ubaidsk commented Apr 14, 2023 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

certik commented Apr 14, 2023

Uh oh!

ubaidsk commented Apr 10, 2023 •

edited

Loading

ubaidsk commented Apr 13, 2023 •

edited

Loading

ubaidsk commented Apr 14, 2023 •

edited

Loading

ubaidsk commented Apr 14, 2023 •

edited

Loading

ubaidsk commented Apr 14, 2023 •

edited

Loading

ubaidsk commented Apr 14, 2023 •

edited

Loading